# __author__ = 'tianfuzneg'
# !/usr/bin/python
# -*- coding:utf-8 -*-

########################################################################################
# 20230324, use new_ref, minimap2 alignment
########################################################################################
import os
import cairosvg
import genomeview

samtools = "/data/fs01/biosoft/samtools-1.9/samtools"
bedtools = "/data/fs01/wangzf/software/bedtools-2.30.0"
sniffles = "/data/fs01/wangzf/software/Sniffles-master/bin/sniffles-core-1.0.11/sniffles"
hg38_fa = "/data/fs09/wangzf/nanopore/ztf/HCC/ref/hg38_mainChr.fa"
sniffles2 = "/data/fs01/wangzf/software/anaconda3/envs/nanoplot/bin/sniffles"
tr_bed = "/data/fs09/wangzf/nanopore/ztf/HCC/ref/sniffles/human_GRCh38_no_alt_analysis_set.trf.bed"

sample_list = ['HCC8_WBC', 'HCC8_N1', 'HCC8_N3', 'HCC8_T1', 'HCC8_T2', 'HCC8_T3', 'HCC8_T4', 'HCC8_T5',
               'HCC9_WBC', 'HCC9_N1', 'HCC9_N3', 'HCC9_T1', 'HCC9_T2', 'HCC9_T3', 'HCC9_T4', 'HCC9_T5',
               'HCC10_WBC', 'HCC10_N1', 'HCC10_N3', 'HCC10_N5', 'HCC10_T1', 'HCC10_T2', 'HCC10_T3', 'HCC10_T4', 'HCC10_T5', 'HCC10_T6', 'HCC10_T7',
               'HCC12_WBC', 'HCC12_N1', 'HCC12_N3', 'HCC12_T1', 'HCC12_T2', 'HCC12_T3', 'HCC12_T4', 'HCC12_T5',
               'HCC13_WBC', 'HCC13_N1', 'HCC13_T1', 'HCC13_T2', 'HCC13_T3', 'HCC13_T4', 'HCC13_T5']
sample_n50_list = ['HCC8_T4', 'HCC9_N3', 'HCC9_T2', 'HCC10_N5', 'HCC12_T3', 'HCC13_T2']

work_dir = "/data/fs09/wangzf/nanopore/ztf/HCC/ONT"
program_dir = '/data/fs09/wangzf/nanopore/ztf/HCC/ONT/program/'
somatic_dir_fs08 = "/data/fs08/wangzf/nanopore/ztf/HCC/ONT/Somatic_3.0"

########################################################################################
# Somtaic pre-process
########################################################################################
# add tag
IDs = [31, 32, 33, 34, 36, 38, 39, 42, 43, 46, 50]
i = 0
for sampleid in sample_list:
    somatic_sample_dir = os.path.join(somatic_dir_fs08, sampleid)
    if not os.path.exists(somatic_sample_dir):
        os.makedirs(somatic_sample_dir)
    if 'WBC' in sampleid:
        tag = 'blood'
    else:
        tag = 'tumor'
    script_tag = os.path.join(program_dir, 'ONT_bam_add_tag_%s.sh' % tag)
    bam_sort = os.path.join('/data/fs08/wangzf/nanopore/ztf/HCC/ONT/HBV_minimap2',
                            'output', sampleid, "%s_minimap2_sorted.bam" % sampleid)
    bam_sort_tag = os.path.join(somatic_sample_dir, '%s_minimap2_newref_sorted_tag.bam' % sampleid)
    stdout = os.path.join(somatic_sample_dir, '%s_add_tag.o' % sampleid)
    stderr = os.path.join(somatic_sample_dir, '%s_add_tag.e' % sampleid)
    if i <= 10:
        os.system(
            'qsub -l hostname=PMC-{server} -S /bin/bash -o {out} -e {err} -N {name} -cwd {script} {a} {b} {c}'.format(
                server=IDs[i], out=stdout, err=stderr, name="t%s" % sampleid.replace('HCC', 'H'), script=script_tag, a=bam_sort,
                b=bam_sort_tag, c=sampleid))
        i = i + 1
    else:
        i = 0
        os.system(
            'qsub -l hostname=PMC-{server} -S /bin/bash -o {out} -e {err} -N {name} -cwd {script} {a} {b} {c}'.format(
                server=IDs[i], out=stdout, err=stderr, name="t%s" % sampleid.replace('HCC', 'H'), script=script_tag, a=bam_sort,
                b=bam_sort_tag, c=sampleid))
        i = i + 1

# merge bam & sniffles
for sampleid in sample_list:
    if 'WBC' in sampleid:
        continue
    else:
        # sniffles v2.0
        sniffles2_sample_dir = os.path.join(somatic_dir_fs08, sampleid, 'sniffles2')
        if os.path.exists(sniffles2_sample_dir):
            os.system("rm -rf %s" % sniffles2_sample_dir)
        os.makedirs(sniffles2_sample_dir)
        sv_vcf2 = os.path.join(sniffles2_sample_dir, '%s_merge_sniffles2.vcf' % sampleid)
        # merge bam
        bam_sort_tag_tumor = os.path.join(somatic_dir_fs08, sampleid, '%s_minimap2_newref_sorted_tag.bam' % sampleid)
        sampleid_blood = '%s_WBC' % sampleid.split('_')[0]
        bam_sort_tag_blood = os.path.join(somatic_dir_fs08, sampleid_blood, '%s_minimap2_newref_sorted_tag.bam' % sampleid_blood)
        bam_merge = os.path.join(somatic_dir_fs08, sampleid, '%s_minimap2_newref_sorted_merge.bam' % sampleid)
        # script
        script_ms = os.path.join(somatic_dir_fs08, sampleid, '%s_merge_sniffles2.sh' % sampleid)
        with open(script_ms, 'w') as out:
            out.write("#! /bin/bash" + '\n')
            out.write('''echo "$(date) 1. Start to merge bam: %s" ''' % sampleid + '\n')
            out.write("{samtools} merge -@ 10 -h {bam_b} {out_bam} {bam_b} {bam_t}".format(
                samtools=samtools, bam_b=bam_sort_tag_blood, bam_t=bam_sort_tag_tumor, out_bam=bam_merge) + '\n')
            out.write('''/data/fs01/biosoft/samtools-1.9/samtools index -@ 10 %s \n''' % bam_merge)
            out.write('''echo "$(date) 1. Finish to merge bam: %s" ''' % sampleid + '\n')


i = 0
IDs = [43, 31, 32, 33, 34, 36, 38, 39, 42, 43, 46, 48, 50]
for sampleid in ['HCC12_T1']:
    somatic_sample_dir = os.path.join(somatic_dir_fs08, sampleid)
    if 'WBC' in sampleid:
        continue
    else:
        tag = 'tumor'
        script_ms = os.path.join(somatic_dir_fs08, sampleid, '%s_merge_sniffles2.sh' % sampleid)
        stdout = script_ms.replace(".sh", ".o")
        stderr = script_ms.replace(".sh", ".e")
        for std in [stdout, stderr]:
            if os.path.exists(std):
                os.system("rm %s" % std)
        if i <= 11:
            os.system(
                'qsub -l hostname=PMC-{server} -S /bin/bash -o {out} -e {err} -N {name} -cwd {script}'.format(
                    server=IDs[i], out=stdout, err=stderr, name="ms%s" % sampleid.replace('HCC', 'H'), script=script_ms))
            i = i + 1
        else:
            i = 0
            os.system(
                'qsub -l hostname=PMC-{server} -S /bin/bash -o {out} -e {err} -N {name} -cwd {script}'.format(
                    server=IDs[i], out=stdout, err=stderr, name="ms%s" % sampleid.replace('HCC', 'H'), script=script_ms))
            i = i + 1

# sniffles change mapq, minsvlen, min-alignment-length
for sampleid in sample_list:
    if 'WBC' in sampleid:
        continue
    else:
        # sniffles v2.0
        sniffles2_sample_dir = os.path.join(somatic_dir_fs08, sampleid, 'sniffles2.1')
        if os.path.exists(sniffles2_sample_dir):
            os.system("rm -rf %s" % sniffles2_sample_dir)
        os.makedirs(sniffles2_sample_dir)
        sv_vcf2 = os.path.join(sniffles2_sample_dir, '%s_merge_sniffles2.vcf' % sampleid)
        # merge bam
        bam_sort_tag_tumor = os.path.join(somatic_dir_fs08, sampleid, '%s_minimap2_newref_sorted_tag.bam' % sampleid)
        sampleid_blood = '%s_WBC' % sampleid.split('_')[0]
        bam_sort_tag_blood = os.path.join(somatic_dir_fs08, sampleid_blood, '%s_minimap2_newref_sorted_tag.bam' % sampleid_blood)
        bam_merge = os.path.join(somatic_dir_fs08, sampleid, '%s_minimap2_newref_sorted_merge.bam' % sampleid)
        # script
        script_ms = os.path.join(sniffles2_sample_dir, '%s_sniffles2.1.sh' % sampleid)
        with open(script_ms, 'w') as out:
            out.write("#! /bin/bash" + '\n')
            out.write('''echo "$(date) 2. Start to sniffles2: %s" ''' % sampleid + '\n')
            out.write('''export PATH="/data/fs01/wangzf/software/anaconda3/bin:$PATH" \n''')
            out.write("source activate nanoplot \n")
            out.write(
                "{sniffles} -i {bam_sort} -v {vcf} --tandem-repeats {tr} -t 36 --minsupport 1 --mapq 20 --min-alignment-length 500 --minsvlen 50 "
                "--output-rnames --allow-overwrite".format(
                    sniffles=sniffles2, bam_sort=bam_merge, vcf=sv_vcf2, tr=tr_bed) + '\n')
            out.write('''echo "$(date) 2. Finish to sniffles2: %s" ''' % sampleid + '\n')

i = 0
IDs = [32, 33, 34, 36, 38, 39, 40, 41, 42, 43, 44, 46, 48, 50]
for sampleid in sample_list:
    sniffles2_sample_dir = os.path.join(somatic_dir_fs08, sampleid, 'sniffles2.1')
    if 'WBC' in sampleid:
        continue
    else:
        tag = 'tumor'
        script_ms = os.path.join(sniffles2_sample_dir, '%s_sniffles2.1.sh' % sampleid)
        stdout = script_ms.replace(".sh", ".o")
        stderr = script_ms.replace(".sh", ".e")
        for std in [stdout, stderr]:
            if os.path.exists(std):
                os.system("rm %s" % std)
        if i <= 13:
            os.system(
                'qsub -l hostname=PMC-{server} -S /bin/bash -o {out} -e {err} -N {name} -cwd {script}'.format(
                    server=IDs[i], out=stdout, err=stderr, name="ms%s" % sampleid.replace('HCC', 'H'), script=script_ms))
            i = i + 1
        else:
            i = 0
            os.system(
                'qsub -l hostname=PMC-{server} -S /bin/bash -o {out} -e {err} -N {name} -cwd {script}'.format(
                    server=IDs[i], out=stdout, err=stderr, name="ms%s" % sampleid.replace('HCC', 'H'), script=script_ms))
            i = i + 1